/***
This figure plots a time series of students' educational progress on the Zearn
Math online platform.
***/

*-------------------------------------------------------------------------------
* Set up
*-------------------------------------------------------------------------------

* Set $root
project figstabs, root
if (r(buildrunning)==0) include "${root}/code/config_interactive.do"

* Set globals
project, uses("${root}/code/set_globals.do")
include "${root}/code/set_globals.do"
local category "Education"

* Create required subfolders
cap mkdir "${root}/results/Education"
cap mkdir "${root}/results/paper numbers"
cap mkdir "${root}/results/paper numbers/`category'"

*-------------------------------------------------------------------------------
* Load and process data
*-------------------------------------------------------------------------------

* Use tracker data
project, uses("${root}/data/web/data/Zearn - National - Weekly.csv")
import delimited "${root}/data/web/data/Zearn - National - Weekly.csv", clear

* Format date
gen date = mdy(month, day_endofweek, year)
format date %td

* Keep the dates we are interested in
keep if inrange(date, mdy(1, 8, 2020), mdy(12, 12, 2021))

sort date
gen week_2020 = _n

foreach x in engagement badges {
	replace `x'_inclow = break_`x'_inclow if mi(`x'_inclow)
	replace `x'_incmiddle = break_`x'_incmiddle if mi(`x'_incmiddle)
	replace `x'_inchigh = break_`x'_inchigh if mi(`x'_inchigh)
}

* Only keep relevant variables and transform
keep week_2020 date engagement_* badges_*
foreach x in engagement badges {
	rename `x'_inclow `x'_1
	rename `x'_incmiddle `x'_0
	rename `x'_inchigh `x'_2
}

* Reshape to match the format we have been using for this graph
reshape long engagement_ badges_, i(week_2020 date) j(nation_poor)
replace nation_poor = -1 if nation_poor == 2
rename engagement_ engagement
rename badges_ badges

* Drop the summer weeks, but keep two to preserve the empty space
drop if week_2020 > 17 & week_2020 < 35 // drops May 10 to Aug 30 2020
drop if week_2020 > 69 & week_2020 < 87 // summer 2021

* Drop winter
drop if week_2020 == 50 // Dec 20 , 51 = Dec 27 and 52 = Jan 3

* Replace data as missing for the two empty weeks for the "summer bar"
*    35 and 36 are Sept 6 and Sept 13 2020, 87 and 88 are 2021
foreach v in 35 36 87 88 {
	foreach x in engagement badges {
replace `x' = . if week_2020 == `v'
	}
}

* Replace data as missing for the two empty weeks for the "winter bar"
foreach v in 51 52 {
	foreach x in engagement badges  {
replace `x' = . if week_2020 == `v'
	}
}

generate upper_engagement = 0.2
generate upper_badge = 0.6

* Adjust the weeks to fix the gap issue
replace week_2020 = week_2020 - 17 if week_2020 >= 35
replace week_2020 = week_2020 - 1 if week_2020 >= 34

* Do same for 2021 summer gap
replace week_2020 = week_2020 - 17 if week_2020 >= 69

* Replace Thanksgiving with values before and after
sort nation_poor date
replace engagement = engagement[_n-1] if week_2020 == 30
replace engagement = engagement[_n+1] if week_2020 == 31
replace engagement = engagement[_n-1] if week_2020 == 64
replace engagement = engagement[_n+1] if week_2020 == 65

label define weeks_label 1 `" "{space 7}Jan 12" "{space 4}2020" "' 17 `" "May 3{space 7}" "2020{space 5}" "' 20 `""{space 8}Sep 20" "{space 5}2020"""' 32 `" "Dec 13{space 8}" "2020{space 5}" "' 35 `" "{space 7}Jan 10" "{space 4}2021" "' 51 `" "May 2{space 8}" "2021{space 6}" "' 54 `" "{space 8}Sep 19" "{space 5}2021" "' /*61 `" "Oct 31" "2021" "'*/ 66 `" "Dec 12{space 8}" "2021{space 5}" "'

label values week_2020 weeks_label

local x engagement

* Generate a marker label for the final week of data
gen toshow = engagement
replace toshow = toshow * 100
sum week_2020
local week `r(max)'
gen toshow2 = string(toshow, "%8.1f")  + "%" if week_2020 == `week'

* Generate marker for each semester before winter, summer, winter
gen toshow3 = string(toshow, "%8.1f")  + "%" if week_2020 == 17
gen toshow4 = string(toshow, "%8.1f")  + "%" if week_2020 == 32
gen toshow5 = string(toshow, "%8.1f")  + "%" if week_2020 == 51
gen toshow_final = toshow2
replace toshow_final = toshow3 if mi(toshow_final)
replace toshow_final = toshow4 if mi(toshow_final)
replace toshow_final = toshow5 if mi(toshow_final)
replace toshow_final = toshow_final + "{space 10}"

* Shift the position of the value labels
gen engagement_shifted_q4 = engagement + (week_2020 == 17) * 0.03 + (week_2020 == 51) * 0.055
gen engagement_shifted_q1 = engagement + (week_2020 == 17) * 0.02 + (week_2020 == 32) * 0.01 + (week_2020 == 51) * 0.01

keep if week_2020 <= 17
gen week_2020_scatter = week_2020 - 0.05

* Students
tw	(connected engagement week_2020 if nation_poor == -1, color(oi2) cmissing(n) msymbol(none)) ///
	(connected engagement week_2020 if nation_poor == 1, color(oi1) cmissing(n) msymbol(none)) ///
	(scatter engagement_shifted_q4 week_2020_scatter if nation_poor == -1, msymbol(none) mlabel(toshow_final) mlabc(oi2) mlabposition(12) mlabsize(3)) ///
	(scatter engagement_shifted_q1 week_2020_scatter if nation_poor == 1, msymbol(none) mlabel(toshow_final) mlabc(oi1) mlabposition(12) mlabsize(3)) ///
	, ///
	legend(off) ///
	yline(0, lcolor(gs8) lpattern(dash)) ///
	xlabel(1 `""Jan 12""2020""'  3 "Jan 26" 5 "Feb 9"  ///
	7 "Feb 23" 9 "Mar 8" 11 "Mar 22" ///
	13 "Apr 5" 15 "Apr 19" 17 "May 3" ,  labsize(small)) ///
	xtitle("") ///
	ytitle("Students Using Zearn Platform (%)" "Relative to January 2020", size(3)) ylabel(-0.6 "-60%" -0.4 "-40%" -0.2 "-20%" 0 "0%" 0.2 "+20%", nogrid) ///
	text(0.15 15 "{bf:Top Income Quartile}", color(oi2) size(*.75)) ///
	text(-0.55 15 "{bf:Bottom Income Quartile}", color(oi1) size(*.75)) ///
	xsize(8)

oi_graph_export "${root}/results/Education/Education - Educational Progress by income", type(${fig_type})

* Black and white version for QJE
tw	(connected engagement week_2020 if nation_poor == -1, color(gs8) cmissing(n) msymbol(none)) ///
	(connected engagement week_2020 if nation_poor == 1, color(gs0) cmissing(n) msymbol(none)) ///
	(scatter engagement_shifted_q4 week_2020_scatter if nation_poor == -1, msymbol(none) mlabel(toshow_final) mlabc(gs8) mlabposition(12) mlabsize(3)) ///
	(scatter engagement_shifted_q1 week_2020_scatter if nation_poor == 1, msymbol(none) mlabel(toshow_final) mlabc(gs0) mlabposition(12) mlabsize(3)) ///
	, ///
	legend(off) ///
	yline(0, lcolor(gs8) lpattern(dash)) ///
	xlabel(1 `""Jan 12""2020""'  3 "Jan 26" 5 "Feb 9"  ///
	7 "Feb 23" 9 "Mar 8" 11 "Mar 22" ///
	13 "Apr 5" 15 "Apr 19" 17 "May 3" ,  labsize(small)) ///
	xtitle("") ///
	ytitle("Students Using Zearn Platform (%)" "Relative to January 2020", size(3)) ylabel(-0.6 "-60%" -0.4 "-40%" -0.2 "-20%" 0 "0%" 0.2 "+20%", nogrid) ///
	text(0.15 15 "{bf:Top Income Quartile}", color(gs8) size(*.75)) ///
	text(-0.55 15 "{bf:Bottom Income Quartile}", color(gs0) size(*.75)) ///
	xsize(8)

graph export "${root}/results/QJE_Figures_BlackAndWhite/Figure_10.svg", replace
project, creates("${root}/results/QJE_Figures_BlackAndWhite/Figure_10.svg")

* Get relevant numbers for the paper
sum toshow if week_2020 == 17 & nation_poor == -1
local summer_20_q4: di %2.0f abs(r(mean))

sum toshow if week_2020 == 17 & nation_poor == 1
local summer_20_q1: di %2.0f abs(r(mean))

*-------------------------------------------------------------------------------
* Export output numbers to csv file
*-------------------------------------------------------------------------------

cap erase "${root}/results/paper numbers/`category'/Effects of COVID on Educational Progress by Income Group.yaml"

yamlout using "${root}/results/paper numbers/`category'/Effects of COVID on Educational Progress by Income Group.yaml", ///
	key("zearn_q4_may20") ///
	comment("Students Using Zearn Platform Relative to Jan 2020 - Income Q4 - May 3 2020") ///
	value(`summer_20_q4') fmt(%2.0f)
yamlout using "${root}/results/paper numbers/`category'/Effects of COVID on Educational Progress by Income Group.yaml", ///
	key("zearn_q1_may20") ///
	comment("Students Using Zearn Platform Relative to Jan 2020 - Income Q1 - May 3 2020") ///
	value(`summer_20_q1') fmt(%2.0f)

project, creates("${root}/results/paper numbers/`category'/Effects of COVID on Educational Progress by Income Group.yaml")
